************************************************************************;
*This do file calculates the uncertainty of the empirical moments using the non-parametric bootstrap method;
*The 100 bootstrap repetitions, clustered at the school level, are used to compute the standard errors of the structural model;
************************************************************************;

use $scratch/final_data_for_model.dta, clear

egen temp = group(idschool) 
sum temp 
local numb_schools = r(max) 
display `numb_schools' 
drop temp

*seed for resampling the data with replacement;
local seed = 123

clear all
set obs 1
forvalues j = 1(1)36{
gen mom`j' = .
}

save collect_moments_boot.dta, replace

*Loop over 100 bootstrap repetitions;
forvalues b = 1(1)100{

set output proc
di "Bootstrap # `b'"
set output error

use $scratch/final_data_for_model.dta, clear

local seed_here = `seed' + `b' 
set seed `seed_here' 

*clustered bootstrap (at the school level)
bsample `numb_schools', cluster(idschool)

recode H1WP2 (0=1) (1=0) , gen(authoritarian_parents)

factor cogn1_tilde cogn2_tilde cogn3_tilde cogn4_tilde cogn5_tilde, factors(1)
predict skills_hat if e(sample), bartlett
qui sum skills_hat if class==9
replace skills_hat = (skills-r(mean))/r(sd)


*Parental Investments;

factor    inv4 inv6   inv9  , factors(1)
predict inv if e(sample) , bartlett

factor    inv4_t2 inv6_t2   inv9_t2  , factors(1)
predict inv_t2 if e(sample) , bartlett

local ind = 1
foreach x in H1WP1 H1WP2 H1WP3  H1WP6  {
recode `x' (0=1) (1=0), gen(author_proxy`ind')
local ++ind
}

local ind = 1
foreach x in  H2WP1 H2WP2 H2WP3  H2WP6  {
recode `x' (0=1) (1=0), gen(author_proxy`ind'_t2)
local ++ind
}


factor author_proxy1 author_proxy2 author_proxy3 author_proxy4  , factors(1) ml
predict author_factor if e(sample), bartlett
qui sum author_factor, de
gen author_factor_sd = (author_factor-r(mean))/r(sd)

factor author_proxy1_t2 author_proxy2_t2 author_proxy3_t2 author_proxy4_t2  , factors(1) ml
predict author_factor_t2 if e(sample), bartlett
qui sum author_factor_t2, de
gen author_factor_t2_sd = (author_factor_t2-r(mean))/r(sd)


forvalues j = 1(1)5{
gen mean_peer_cogn`j'_tilde = (mean_peer_cogn`j'-mu_cogn`j')/lambda_cogn`j'
}

forvalues j = 2(1)5{
gen mean_peer_cogn`j'_tilde_t2 = (mean_peer_cogn`j'_t2 - mu_cogn`j')/lambda_cogn`j'
}


factor mean_peer_cogn2_tilde mean_peer_cogn1_tilde   mean_peer_cogn3_tilde mean_peer_cogn4_tilde mean_peer_cogn5_tilde, factors(1)
predict peers_skills_hat if e(sample), bartlett
qui sum peers_skills_hat
replace peers_skills_hat = (peers_skills_hat-r(mean))/r(sd)

factor mean_peer_cogn2_tilde_t2  mean_peer_cogn3_tilde_t2 mean_peer_cogn4_tilde_t2 mean_peer_cogn5_tilde_t2, factors(1)
predict peers_skills_t2_hat if e(sample), bartlett
qui sum peers_skills_t2_hat
replace peers_skills_t2_hat = (peers_skills_t2_hat-r(mean))/r(sd)


forvalues j = 2(1)5{
gen cogn`j'_tilde_t2 = (cogn`j'_t2 - mu_cogn`j')/lambda_cogn`j'
}

factor cogn2_tilde_t2  cogn3_tilde_t2 cogn4_tilde_t2 cogn5_tilde_t2, factors(1)
predict skills_t2_hat if e(sample), bartlett


qui sum author_factor_sd, de 
gen d_author_factor_median = ( author_factor_sd > r(p50)) if author_factor_sd!=.
qui sum author_factor_t2_sd, de 
gen d_author_factor_median_t2 = ( author_factor_t2_sd > r(p50)) if author_factor_t2_sd!=.

gen delta_author_factor = author_factor_t2_sd - author_factor_sd

gen delta_author = d_author_factor_median_t2 - d_author_factor_median
gen delta_inv = inv_t2 - inv
gen delta_skills = skills_t2_hat - skills_hat
gen delta_peer_skills = peers_skills_t2_hat - peers_skills_hat


preserve

keep if school_size>=200
keep if class >=9 & class<=12
*Keep main High-Schools in the sample;
keep if  idschool<100 | (idschool>=200 & idschool<300) 



********************************;
*1) Mean of PS in the sample;
********************************;

sum authoritarian_parents if class<=11
gen mom1  = r(mean)

********************************;
*2) Regression of PS on state variables;
********************************;

xtset idgrade
xtreg d_author_factor_median skills_hat peers_skills_hat , fe 
*xtreg author_factor skills_hat peers_skills_hat , fe 

gen mom2  = _b[skills_hat]
gen mom3  = _b[peers_skills_hat]

************************************************;
* Dynamic Effects;
************************************************;

xtreg skills_t2_hat skills_hat peers_skills_hat d_author_factor_median  if class<=11 , fe vce(cluster idschool)

gen mom4  = _b[skills_hat]
gen mom5  = _b[peers_skills_hat]
gen mom6  = _b[d_author_factor_median]

xtreg skills_t2_hat skills_hat peers_skills_hat  if d_author_factor_median==0   & class<=11 , fe vce(cluster idschool)

gen mom7  = _b[skills_hat]
gen mom8  = _b[peers_skills_hat]

xtreg skills_t2_hat skills_hat peers_skills_hat  if d_author_factor_median==1   & class<=11 , fe vce(cluster idschool)

gen mom9  = _b[skills_hat]
gen mom10  = _b[peers_skills_hat]

keep mom*
keep in 1


save temp_collect_moments_boot.dta, replace

restore


preserve

keep if class >=9 & class<=11
*Keep only saturated High-Schools in the sample;
keep if saturated==1

xtset idgrade

foreach x in skills_hat peers_skills_hat{
gen `x'_miss = `x' 
qui sum `x'
replace `x'_miss = r(mean) if `x'==.
gen d_`x'_miss = (`x'==.)
}

xtreg peers_skills_t2_hat  skills_hat_miss peers_skills_hat_miss d_skills_hat_miss d_peers_skills_hat_miss d_author_factor_median , fe vce(cluster idschool)
gen mom11  = _b[skills_hat]
gen mom12  = _b[peers_skills_hat]
gen mom13  = _b[d_author_factor_median]

xtreg peers_skills_t2_hat skills_hat_miss peers_skills_hat_miss d_skills_hat_miss d_peers_skills_hat_miss  if d_author_factor_median==0 , fe vce(cluster idschool)
gen mom14  = _b[skills_hat]
gen mom15  = _b[peers_skills_hat]

xtreg peers_skills_t2_hat skills_hat_miss peers_skills_hat_miss d_skills_hat_miss d_peers_skills_hat_miss  if d_author_factor_median==1 , fe vce(cluster idschool)
gen mom16  = _b[skills_hat]
gen mom17  = _b[peers_skills_hat]

keep mom*
keep in 1

append using temp_collect_moments_boot.dta

save temp_collect_moments_boot.dta, replace

restore

preserve
keep if school_size>=200
keep if class >=9 & class<=12
*Keep main High-Schools in the sample;
keep if  idschool<100 | (idschool>=200 & idschool<300) 

xtset idgrade

*5) Regression of Investments on state variables conditioning on Parenting Style=0;
xtreg inv skills_hat peers_skills_hat if d_author_factor_median==0 & class<=11, fe vce(cluster idschool)
gen mom18  = _b[skills_hat]
gen mom19  = _b[peers_skills_hat]

*6) Regression of Investments on state variables conditioning on Parenting Style=1;
xtreg inv skills_hat peers_skills_hat if d_author_factor_median==1 & class<=11 , fe vce(cluster idschool)
gen mom20  = _b[skills]
gen mom21  = _b[peers_skills]



*7) Dynamics of Skills;
egen skills_mean = rowmean(cogn1_tilde cogn2_tilde  cogn4_tilde cogn5_tilde)


local ind_mom = 22
forvalues j = 9(1)12{
qui sum skills_mean if class==`j'
gen mom`ind_mom'  = r(mean)
local ind_mom = `ind_mom ' + 1  
}

qui sum inv if class<=11
gen inv_mean0 = inv - r(mean) if class<=11

*8) Mean Parental Investments conditioning on Parenting Style=0;
sum inv_mean0 if authoritarian_parents==0
gen mom`ind_mom'  = r(mean)
local ind_mom = `ind_mom ' + 1 

*9) Mean Parental Investments conditioning on Parenting Style=1;
sum inv_mean0 if authoritarian_parents==1
gen mom`ind_mom'  = r(mean)
local ind_mom = `ind_mom ' + 1

*10) Mean Number of Friends;
egen n_friends = rowmax( n_peers_cogn1 n_peers_cogn2 n_peers_cogn3 n_peers_cogn4 n_peers_cogn5)
sum n_friends if n_friends>0 & class<=11 & inv!=. & peers_skills_hat!=.
gen mom`ind_mom'  = r(mean)
local ind_mom = `ind_mom ' + 1

keep mom*
keep in 1

append using temp_collect_moments_boot.dta

save temp_collect_moments_boot.dta, replace

restore

preserve

keep if class >=9 & class<=11
keep if saturated==1


reg delta_author delta_skills  delta_peer_skills ,  vce(cluster idschool) 
gen mom`ind_mom'  = _b[delta_skills]
local ind_mom = `ind_mom ' + 1
gen mom`ind_mom'  = _b[delta_peer_skills]
local ind_mom = `ind_mom ' + 1


reg delta_inv  authoritarian_parents  ,  vce(cluster idschool)
gen mom`ind_mom'  = _b[_cons]
local ind_mom = `ind_mom ' + 1
gen mom`ind_mom'  = _b[authoritarian_parents]
local ind_mom = `ind_mom ' + 1


append using temp_collect_moments_boot.dta

save temp_collect_moments_boot.dta, replace

restore





*************************************************;
*Initial Conditions;
*************************************************;

preserve


egen mean_skills_school_9 = mean(skills_hat) if class==9, by(idschool)
egen sd_skills_school_9 = sd(skills_hat) if class==9, by(idschool)

gen n_kids = 1 
collapse  mean_skills_school_9 sd_skills_school_9  (count) n_kids if class==9 & school_size>200, by(idschool)

xtile quartiles_schools = mean_skills_school, nquantiles(4)

drop if quartiles_schools==.

drop if n_kids<=10
bysort quartiles_schools: sum mean_skills_school_9 , de 
bysort quartiles_schools: sum sd_skills_school_9 , de 
bysort quartiles_schools: sum n_kids , de 


gen mean_neighborhood = .
gen sd_neighborhood   = .


forvalues j = 1(1)4{

qui sum mean_skills_school_9 if quartiles_schools==`j'
replace mean_neighborhood = r(mean) in `j'

qui sum sd_skills_school_9 if quartiles_schools==`j'
replace sd_neighborhood = r(mean) in `j'

}

keep mean_neighborhood  sd_neighborhood 
keep in 1/4

outsheet using "$matlab_dir_boot\initial_conditions_boot`b'.txt", nonames replace


restore

*Generate set of neighborhoods based on the distribution of skills among schools;
preserve
egen mean_skills_school_9 = mean(skills_hat) if class==9, by(idschool)
egen sd_skills_school_9 = sd(skills_hat) if class==9, by(idschool)

gen n_kids = 1 
collapse  mean_skills_school_9 sd_skills_school_9  (count) n_kids if class==9 & school_size>200, by(idschool)

xtile quartiles_schools = mean_skills_school, nquantiles(4)

drop if quartiles_schools==.

keep quartiles_schools idschool

save $scratch\schools_neighborhoods_id_boot.dta, replace

restore


preserve

keep if school_size>=200
keep if class >=9 & class<=12
*Keep main High-Schools in the sample;
keep if  idschool<100 | (idschool>=200 & idschool<300) 
drop _merge
merge m:1 idschool using $scratch\schools_neighborhoods_id_boot.dta
drop if _merge==2
drop if quartiles_schools==.

collapse authoritarian_parents if class<=11, by(quartiles_schools)
sort quartiles_schools 
keep authoritarian_parents
xpose, clear

local ind = 33
forvalues j = 1(1)4{

rename v`j' mom`ind'
local ++ind
}

append using temp_collect_moments_boot.dta

save temp_collect_moments_boot.dta, replace

restore



*******************************************;
* Average Family Income in Neighborhoods;
*******************************************;


preserve

keep if school_size>=200
keep if class >=9 & class<=12
*Keep main High-Schools in the sample;
keep if  idschool<100 | (idschool>=200 & idschool<300) 
drop _merge
merge m:1 idschool using $scratch\schools_neighborhoods_id_boot.dta
drop if _merge==2
drop if quartiles_schools==.


collapse real_family_income , by(quartiles_schools)

keep real_family_income

xpose, clear

outsheet using "$matlab_dir_boot\family_income_neighborhoods_boot`b'.txt", nonames replace

restore



use temp_collect_moments_boot.dta, clear

collapse mom*

order mom*, sequential


gen b = `b'

append using collect_moments_boot.dta

save collect_moments_boot.dta, replace
}


***********************************************************;
*Store the bootstrap distribution of the empirical moments;
***********************************************************;

use collect_moments_boot.dta, clear
drop if b==.
sort b
drop b

outsheet using "$matlab_dir_boot\moments_boot.txt", nonames replace


************************************************;
*Compute the variance of the empirical moments;
************************************************;

collapse (sd) mom*
forvalues j = 1(1)36{
rename mom`j' sd_mom`j'
gen var_mom`j' = sd_mom`j'^(2)
}
keep var_*
xpose, clear

outsheet using "$matlab_dir_boot\variance_moments.txt", nonames replace

